
// A1_Create_dataset
//==============================================================================

// Description: This file creates the final dataset that is used for the estimation:
// - Removes outliers and creates/merges the instrument
// - Merges in demographics
// - Creates product files that are later read into R

clear
set more off
cd "D:\data_replication"

//==============================================================================


// Import and format instruments
//==============================================================================

// Feenstra-Romalis IV
//------------------------------------------------------------------------------

forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc7 = substr(pc8plus, 1, 7) 
sort declarant partner year quarter pc7
by declarant partner year quarter pc7: egen instrument_pc7 = mean(instrument_pc8plus)
by declarant partner year quarter pc7: keep if _n == 1
keep declarant partner year quarter pc7 instrument_pc7
rename pc7 pc8plus
rename instrument_pc7 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc7_`y'.dta, replace
}



forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc6 = substr(pc8plus, 1, 6) 
sort declarant partner year quarter pc6
by declarant partner year quarter pc6: egen instrument_pc6 = mean(instrument_pc8plus)
by declarant partner year quarter pc6: keep if _n == 1
keep declarant partner year quarter pc6 instrument_pc6
rename pc6 pc8plus
rename instrument_pc6 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc6_`y'.dta, replace
}


forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc5 = substr(pc8plus, 1, 5) 
sort declarant partner year quarter pc5
by declarant partner year quarter pc5: egen instrument_pc5 = mean(instrument_pc8plus)
by declarant partner year quarter pc5: keep if _n == 1
keep declarant partner year quarter pc5 instrument_pc5
rename pc5 pc8plus
rename instrument_pc5 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc5_`y'.dta, replace
}


forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc4 = substr(pc8plus, 1, 4) 
sort declarant partner year quarter pc4
by declarant partner year quarter pc4: egen instrument_pc4 = mean(instrument_pc8plus)
by declarant partner year quarter pc4: keep if _n == 1
keep declarant partner year quarter pc4 instrument_pc4
rename pc4 pc8plus
rename instrument_pc4 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc4_`y'.dta, replace
}


forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc3 = substr(pc8plus, 1, 3) 
sort declarant partner year quarter pc3
by declarant partner year quarter pc3: egen instrument_pc3 = mean(instrument_pc8plus)
by declarant partner year quarter pc3: keep if _n == 1
keep declarant partner year quarter pc3 instrument_pc3
rename pc3 pc8plus
rename instrument_pc3 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc3_`y'.dta, replace
}


forval y = 2003/2007 {
use data\alt_iv\Instrument_pc8plus_`y'.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc2 = substr(pc8plus, 1, 2) 
sort declarant partner year quarter pc2
by declarant partner year quarter pc2: egen instrument_pc2 = mean(instrument_pc8plus)
by declarant partner year quarter pc2: keep if _n == 1
keep declarant partner year quarter pc2 instrument_pc2
rename pc2 pc8plus
rename instrument_pc2 instrument_pc8plus
save robustness\alt_iv\1_data_format\Instrument_pc2_`y'.dta, replace
}


// Cost per Unit
//------------------------------------------------------------------------------

use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc7 = substr(pc8plus, 1, 7) 
sort pc7 year
by pc7 year: egen cost_unit_km_pc7 = mean(cost_unit_km_pc8plus)
by pc7 year: keep if _n == 1
keep pc7 year cost_unit_km_pc7
rename pc7 pc8plus
rename cost_unit_km_pc7 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc7.dta, replace


use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc6 = substr(pc8plus, 1, 6) 
sort pc6 year
by pc6 year: egen cost_unit_km_pc6 = mean(cost_unit_km_pc8plus)
by pc6 year: keep if _n == 1
keep pc6 year cost_unit_km_pc6
rename pc6 pc8plus
rename cost_unit_km_pc6 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc6.dta, replace


use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc5 = substr(pc8plus, 1, 5) 
sort pc5 year
by pc5 year: egen cost_unit_km_pc5 = mean(cost_unit_km_pc8plus)
by pc5 year: keep if _n == 1
keep pc5 year cost_unit_km_pc5
rename pc5 pc8plus
rename cost_unit_km_pc5 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc5.dta, replace


use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc4 = substr(pc8plus, 1, 4) 
sort pc4 year
by pc4 year: egen cost_unit_km_pc4 = mean(cost_unit_km_pc8plus)
by pc4 year: keep if _n == 1
keep pc4 year cost_unit_km_pc4
rename pc4 pc8plus
rename cost_unit_km_pc4 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc4.dta, replace


use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc3 = substr(pc8plus, 1, 3) 
sort pc3 year
by pc3 year: egen cost_unit_km_pc3 = mean(cost_unit_km_pc8plus)
by pc3 year: keep if _n == 1
keep pc3 year cost_unit_km_pc3
rename pc3 pc8plus
rename cost_unit_km_pc3 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc3.dta, replace


use data\alt_iv\cost_unit_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc2 = substr(pc8plus, 1, 2) 
sort pc2 year
by pc2 year: egen cost_unit_km_pc2 = mean(cost_unit_km_pc8plus)
by pc2 year: keep if _n == 1
keep pc2 year cost_unit_km_pc2
rename pc2 pc8plus
rename cost_unit_km_pc2 cost_unit_km_pc8plus
save robustness\alt_iv\1_data_format\cost_unit_km_pc2.dta, replace


// Cost per Dollar
//------------------------------------------------------------------------------

use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc7 = substr(pc8plus, 1, 7) 
sort pc7 year
by pc7 year: egen cost_dollar_km_pc7 = mean(cost_dollar_km_pc8plus)
by pc7 year: keep if _n == 1
keep pc7 year cost_dollar_km_pc7
rename pc7 pc8plus
rename cost_dollar_km_pc7 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc7.dta, replace


use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc6 = substr(pc8plus, 1, 6) 
sort pc6 year
by pc6 year: egen cost_dollar_km_pc6 = mean(cost_dollar_km_pc8plus)
by pc6 year: keep if _n == 1
keep pc6 year cost_dollar_km_pc6
rename pc6 pc8plus
rename cost_dollar_km_pc6 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc6.dta, replace


use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc5 = substr(pc8plus, 1, 5) 
sort pc5 year
by pc5 year: egen cost_dollar_km_pc5 = mean(cost_dollar_km_pc8plus)
by pc5 year: keep if _n == 1
keep pc5 year cost_dollar_km_pc5
rename pc5 pc8plus
rename cost_dollar_km_pc5 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc5.dta, replace


use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc4 = substr(pc8plus, 1, 4) 
sort pc4 year
by pc4 year: egen cost_dollar_km_pc4 = mean(cost_dollar_km_pc8plus)
by pc4 year: keep if _n == 1
keep pc4 year cost_dollar_km_pc4
rename pc4 pc8plus
rename cost_dollar_km_pc4 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc4.dta, replace


use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc3 = substr(pc8plus, 1, 3) 
sort pc3 year
by pc3 year: egen cost_dollar_km_pc3 = mean(cost_dollar_km_pc8plus)
by pc3 year: keep if _n == 1
keep pc3 year cost_dollar_km_pc3
rename pc3 pc8plus
rename cost_dollar_km_pc3 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc3.dta, replace


use data\alt_iv\cost_dollar_km_pc8plus.dta, clear
gen ll = length(pc8plus)
keep if ll == 8
gen pc2 = substr(pc8plus, 1, 2) 
sort pc2 year
by pc2 year: egen cost_dollar_km_pc2 = mean(cost_dollar_km_pc8plus)
by pc2 year: keep if _n == 1
keep pc2 year cost_dollar_km_pc2
rename pc2 pc8plus
rename cost_dollar_km_pc2 cost_dollar_km_pc8plus
save robustness\alt_iv\1_data_format\cost_dollar_km_pc2.dta, replace



use estimation\1_data_format\production_imports.dta, clear
gen ll = length(pc8plus)
gen pc7 = pc8plus
replace pc7 = substr(pc8plus, 1, 7) if ll == 8
gen pc6 = pc8plus
replace pc6 = substr(pc8plus, 1, 6) if ll == 8
gen pc5 = pc8plus
replace pc5 = substr(pc8plus, 1, 5) if ll == 8
gen pc4 = pc8plus
replace pc4 = substr(pc8plus, 1, 4) if ll == 8
gen pc3 = pc8plus
replace pc3 = substr(pc8plus, 1, 3) if ll == 8
gen pc2 = pc8plus
replace pc2 = substr(pc8plus, 1, 2) if ll == 8
save robustness\alt_iv\1_data_format\production_imports_detail.dta, replace

//==============================================================================


// Format Non-Composite Goods
//==============================================================================

set more off
forval j = 1/3817 {


// 8-Digit and Synthetic Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 8 | synth[1] == 1 {

merge 1:m pc8plus using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument


gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using data\alt_iv\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using data\alt_iv\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using data\alt_iv\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 7-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 7 & synth[1] != 1 {

rename pc8plus pc7
merge 1:m pc7 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc7 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc7 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc7 = imports_value_pc7 / imports_quantity_pc7
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc7 imports_value_pc8plus
rename imports_quantity_pc7 imports_quantity_pc8plus 
rename price_pc7 price_pc8plus
rename pc7 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument


gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc7_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc7.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc7.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 6-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 6 & synth[1] != 1 {

rename pc8plus pc6
merge 1:m pc6 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc6 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc6 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc6 = imports_value_pc6 / imports_quantity_pc6
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc6 imports_value_pc8plus
rename imports_quantity_pc6 imports_quantity_pc8plus 
rename price_pc6 price_pc8plus
rename pc6 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument


gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc6_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc6.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc6.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 5-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 5 & synth[1] != 1 {

rename pc8plus pc5
merge 1:m pc5 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc5 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc5 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc5 = imports_value_pc5 / imports_quantity_pc5
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc5 imports_value_pc8plus
rename imports_quantity_pc5 imports_quantity_pc8plus 
rename price_pc5 price_pc8plus
rename pc5 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc5_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc5.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc5.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 4-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 4 & synth[1] != 1 {

rename pc8plus pc4
merge 1:m pc4 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc4 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc4 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc4 = imports_value_pc4 / imports_quantity_pc4
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc4 imports_value_pc8plus
rename imports_quantity_pc4 imports_quantity_pc8plus 
rename price_pc4 price_pc8plus
rename pc4 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc4_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc4.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc4.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 3-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 3 & synth[1] != 1 {

rename pc8plus pc3
merge 1:m pc3 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc3 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc3 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc3 = imports_value_pc3 / imports_quantity_pc3
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc3 imports_value_pc8plus
rename imports_quantity_pc3 imports_quantity_pc8plus 
rename price_pc3 price_pc8plus
rename pc3 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc3_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc3.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc3.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 2-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
keep pc product_id
rename pc pc8plus
gen ll = length(pc8plus)
gen synth = 0
replace synth = 1 if strpos(pc8plus, ".")

if ll[1] == 2 & synth[1] != 1 {

rename pc8plus pc2
merge 1:m pc2 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc2 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc2 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc2 = imports_value_pc2 / imports_quantity_pc2
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll synth
rename imports_value_pc2 imports_value_pc8plus
rename imports_quantity_pc2 imports_quantity_pc8plus 
rename price_pc2 price_pc8plus
rename pc2 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
sort pc8plus
by pc8plus: egen price_mean = mean(price_pc8plus)
by pc8plus: egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc2_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc2.dta
keep if _merge == 3
drop _merge
gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc2.dta
keep if _merge == 3
drop _merge
gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort pc8plus declarant year quarter
by pc8plus declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by pc8plus declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort pc8plus year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by pc8plus year partner: egen price_sum = sum(price_pc8plus)
by pc8plus year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by pc8plus year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by pc8plus year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort pc8plus partner declarant
by pc8plus partner: egen price_sum = sum(price_pc8plus)
by pc8plus partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort pc8plus declarant year quarter
by pc8plus declarant year quarter: gen products = _N
drop if products == 1

sort pc8plus year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


}


//==============================================================================


// Format Composite Goods
//==============================================================================

set more off
forval j = 3818/4221 {


// 7-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 7 {

rename pc8plus pc7
merge 1:m pc7 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using data\alt_iv\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using data\alt_iv\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge


merge m:1 pc8plus year using data\alt_iv\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc7 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc7 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc7 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc7 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc7 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc7 = imports_value_pc7 / imports_quantity_pc7
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc7 imports_value_pc8plus
rename imports_quantity_pc7 imports_quantity_pc8plus 
rename price_pc7 price_pc8plus
rename iv_feenstra_pc7 iv_feenstra
rename cost_unit_km_pc7 cost_unit_km_pc8plus
rename cost_dollar_km_pc7 cost_dollar_km_pc8plus
rename pc7 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 6-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 6 {

rename pc8plus pc6
merge 1:m pc6 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc6 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc6 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc6 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc6 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc6 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc6 = imports_value_pc6 / imports_quantity_pc6
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc6 imports_value_pc8plus
rename imports_quantity_pc6 imports_quantity_pc8plus 
rename price_pc6 price_pc8plus
rename iv_feenstra_pc6 iv_feenstra
rename cost_unit_km_pc6 cost_unit_km_pc8plus
rename cost_dollar_km_pc6 cost_dollar_km_pc8plus
rename pc6 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 5-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 5 {

rename pc8plus pc5
merge 1:m pc5 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc5 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc5 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc5 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc5 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc5 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc5 = imports_value_pc5 / imports_quantity_pc5
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc5 imports_value_pc8plus
rename imports_quantity_pc5 imports_quantity_pc8plus 
rename price_pc5 price_pc8plus
rename iv_feenstra_pc5 iv_feenstra
rename cost_unit_km_pc5 cost_unit_km_pc8plus
rename cost_dollar_km_pc5 cost_dollar_km_pc8plus
rename pc5 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 4-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 4 {

rename pc8plus pc4
merge 1:m pc4 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc4 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc4 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc4 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc4 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc4 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc4 = imports_value_pc4 / imports_quantity_pc4
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc4 imports_value_pc8plus
rename imports_quantity_pc4 imports_quantity_pc8plus 
rename price_pc4 price_pc8plus
rename iv_feenstra_pc4 iv_feenstra
rename cost_unit_km_pc4 cost_unit_km_pc8plus
rename cost_dollar_km_pc4 cost_dollar_km_pc8plus
rename pc4 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 3-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 3 {

rename pc8plus pc3
merge 1:m pc3 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc3 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc3 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc3 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc3 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc3 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc3 = imports_value_pc3 / imports_quantity_pc3
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc3 imports_value_pc8plus
rename imports_quantity_pc3 imports_quantity_pc8plus 
rename price_pc3 price_pc8plus
rename iv_feenstra_pc3 iv_feenstra
rename cost_unit_km_pc3 cost_unit_km_pc8plus
rename cost_dollar_km_pc3 cost_dollar_km_pc8plus
rename pc3 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}


// 2-Digit  Categories
//------------------------------------------------------------------------------

use estimation\2_product_list\output\product_id_all.dta, clear
keep if product_id == `j'
expand 2 in 1
replace pc8plus = pc if _n == 1
keep pc8plus product_id
gen ll = length(pc8plus)

if ll[1] == 2 {

rename pc8plus pc2
merge 1:m pc2 using robustness\alt_iv\1_data_format\production_imports_detail.dta
keep if _merge == 3
drop _merge


// Shipping Cost Instrument

gen match = 0
gen iv_feenstra = .
forval y = 2003/2007 {
merge 1:1 pc8plus year quarter declarant partner using robustness\alt_iv\1_data_format\Instrument_pc8plus_`y'.dta
drop if _merge == 2
replace match = 1 if _merge == 3
replace iv_feenstra = instrument_pc8plus if _merge == 3
drop _merge instrument_pc8plus
}
replace iv_feenstra = 0 if partner == declarant
replace match = 1 if partner == declarant
drop if match == 0
drop if iv_feenstra == .
drop match


merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_unit_km_pc8plus.dta
keep if _merge == 3
drop _merge

merge m:1 pc8plus year using robustness\alt_iv\1_data_format\cost_dollar_km_pc8plus.dta
keep if _merge == 3
drop _merge


sort declarant partner year quarter
by declarant partner year quarter: egen imports_value_pc2 = sum(imports_value_pc8plus)
by declarant partner year quarter: egen imports_quantity_pc2 = sum(imports_quantity_pc8plus)
by declarant partner year quarter: egen iv_feenstra_pc2 = mean(iv_feenstra)
by declarant partner year quarter: egen cost_unit_km_pc2 = mean(cost_unit_km_pc8plus)
by declarant partner year quarter: egen cost_dollar_km_pc2 = mean(cost_dollar_km_pc8plus)
by declarant partner year quarter: keep if _n == 1
gen price_pc2 = imports_value_pc2 / imports_quantity_pc2
drop pc8plus imports_value_pc8plus imports_quantity_pc8plus price_pc8plus ll iv_feenstra cost_unit_km_pc8plus cost_dollar_km_pc8plus
rename imports_value_pc2 imports_value_pc8plus
rename imports_quantity_pc2 imports_quantity_pc8plus 
rename price_pc2 price_pc8plus
rename iv_feenstra_pc2 iv_feenstra
rename cost_unit_km_pc2 cost_unit_km_pc8plus
rename cost_dollar_km_pc2 cost_dollar_km_pc8plus
rename pc2 pc8plus


// Delete Outliers and Missing Observations
//------------------------------------------------------------------------------

drop if price_pc8plus == .
egen price_mean = mean(price_pc8plus)
egen price_median = median(price_pc8plus)
gen ratio_mean = price_pc8plus / price_mean 
gen ratio_median = price_pc8plus / price_median 
gen ratio_mean_reversed = price_mean / price_pc8plus  
gen ratio_median_reversed =  price_median / price_pc8plus
keep if ratio_median < 30
keep if ratio_median_reversed < 30
drop price_mean price_median ratio_mean ratio_median ratio_mean_reversed ratio_median_reversed


// Population Partner
merge m:1 year partner using data\population\population_partner.dta
keep if _merge == 3
drop _merge partner_description
rename population population_partner


//GDP per capita Partner
merge m:1 partner year using data\gdp_per_capita\gdp_per_capita_defl.dta
keep if _merge == 3
drop _merge
rename gdp_per_capita gdp_per_capita_partner
rename gdp_per_capita_defl gdp_per_capita_defl_partner

// Distance
merge m:1 declarant partner using data\distance\distance_declarant_partner.dta
keep if _merge == 3
drop _merge

// Developed Country Dummy
merge m:1 partner using data\country_codes\developed_country.dta
keep if _merge == 3
drop _merge

// Exchange Rates
merge m:1 partner year using data\exchange_rates\D_exchange_rate_partner.dta
keep if _merge == 3
drop _merge


gen iv_unit = cost_unit_km_pc8plus * distance
drop cost_unit_km_pc8plus

gen iv_dollar = cost_dollar_km_pc8plus * distance
drop cost_dollar_km_pc8plus


// Create Shares
sort declarant year quarter
by declarant year quarter: egen imports_quantity_pc8_total = sum(imports_quantity_pc8plus)
by declarant year quarter: egen imports_value_pc8_total = sum(imports_value_pc8plus)
gen share = imports_quantity_pc8plus / imports_quantity_pc8_total
gen exp_share = imports_value_pc8plus / imports_value_pc8_total


// Create Hausmann Instruments: Current Year
sort year partner declarant
gen price_pc8_weighted = price_pc8plus * share
by year partner: egen price_sum = sum(price_pc8plus)
by year partner: egen price_sum_weighted=sum(price_pc8_weighted)
by year partner: gen number_importers=_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_sum_ex_own_weighted = price_sum_weighted - price_pc8_weighted
by year partner: egen weight_sum = sum(share)
gen price_ex_own = price_sum_ex_own/(number_importers-1)
gen price_ex_own_weighted = price_sum_ex_own_weighted/(weight_sum-share)
drop if price_ex_own == .
drop if price_ex_own_weighted == .
drop weight_sum price_sum_ex_own_weighted price_sum_ex_own number_importers price_sum_weighted price_sum price_pc8_weighted


// Create Hausmann Instruments: All years
sort partner declarant
by partner: egen price_sum = sum(price_pc8plus)
by partner: gen number_importers =_N
gen price_sum_ex_own = price_sum - price_pc8plus
gen price_ex_own_all_years = price_sum_ex_own/(number_importers-1)
drop price_sum number_importers price_sum_ex_own
drop if price_ex_own_all_years == .

sort declarant year quarter
by declarant year quarter: gen products = _N
drop if products == 1

sort year quarter declarant share
order year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner 

keep year quarter declarant partner share exp_share price_pc8plus price_ex_own price_ex_own_weighted price_ex_own_all_years D_exchange_rate gdp_per_capita_partner gdp_per_capita_defl_partner distance developed_country population_partner products iv_feenstra iv_unit iv_dollar
egen market_id = group(year quarter declarant)
save robustness\alt_iv\1_data_format\product_files\data_product_`j'.dta, replace
outsheet using robustness\alt_iv\1_data_format\product_files\data_product_`j'.csv, comma replace

}

}
